### Analyses of terrestrial invertebrate data: composition, alpha diversity, beta diversity ###

library(dplyr)
library(tidyr)

###----------------------###
### Ordination ###
# load and prep data
abun.terr <- read.csv("Terrestrial invertebrate abundance data.csv")
abun.terr <- abun.terr %>% filter(!Channel=='12') # remove channel with low sample coverage (see below)
env.terr <- cbind(abun.terr$Channel, abun.terr$Treatment)
env.terr <- data.frame(env.terr)
colnames(env.terr) <- c("Channel", "Treatment")

abun.terr <- data.frame(abun.terr[,-1], row.names=abun.terr[,1]) 
abun.terr <- select(abun.terr, -Treatment) 

# run NMDS
nmds.terr=metaMDS(abun.terr)

# load species quality scores (downloaded from Pantheon)
SQS.terr <- read.csv("Rarity scores.csv")

### PLOT ###
# load packages
library(vegan)
library(ggplot2)
library(ggrepel)

# extract sample and species scores
channel.scores <- as.data.frame(scores(nmds.terr$points))
channel.scores$Channel <- rownames(channel.scores)
channel.scores$Treatment <- env.terr$Treatment
species.scores <- as.data.frame(scores(nmds.terr, "species"))
species.scores$Species <- rownames(species.scores)

# create vector of taxonomic orders/classes for plotting
Group <- c(rep("Arachnida", 29) , rep("Coleoptera", 37), rep("Hemiptera", 32), rep("Hymenoptera", 11), rep("Diptera", 38), rep("Other", 19))
species.scores2 <- cbind(species.scores, Group, SQS.terr$SQS)
species.scores2 <-  species.scores2 %>% rename(SQS = `SQS.terr$SQS`)

# create convex hulls for each treatment
treat.con <- channel.scores[channel.scores$Treatment == "CON", ][chull(channel.scores[channel.scores$Treatment == 
                                                                                        "CON", c("MDS1", "MDS2")]), ]
treat.frag <- channel.scores[channel.scores$Treatment == "FRAG", ][chull(channel.scores[channel.scores$Treatment == 
                                                                                          "FRAG", c("MDS1", "MDS2")]), ]
treat.dry <- channel.scores[channel.scores$Treatment == "DRY", ][chull(channel.scores[channel.scores$Treatment == 
                                                                                        "DRY", c("MDS1", "MDS2")]), ]

hull.data <- rbind(treat.con, treat.frag, treat.dry)
hull.data$Treatment <- ordered(hull.data$Treatment, levels = c("CON", "FRAG", "DRY"))

# shortlist rare species for labelling
rare.species <- species.scores2 %>% 
  filter(SQS>1)

rare.species2 <- rare.species %>% 
  mutate(Species = replace(Species, Species == "Xysticus.sabulosus", "Xysticus sabulosus"))%>% 
  mutate(Species = replace(Species, Species == "Sardinidion.blackwalli", "Sardinidion blackwalli"))%>% 
  mutate(Species = replace(Species, Species == "Panamomops.sulcifrons", "Panamomops sulcifrons"))%>% 
  mutate(Species = replace(Species, Species == "Gymnetron.veronicae", "Gymnetron veronicae"))%>% 
  mutate(Species = replace(Species, Species == "Drupenatus.nasturtii", "Drupenatus nasturtii"))%>% 
  mutate(Species = replace(Species, Species == "Neophytobius.muricatus", "Neophytobius muricatus"))%>% 
  mutate(Species = replace(Species, Species == "Rhynchites.longiceps", "Rhynchites longiceps"))%>% 
  mutate(Species = replace(Species, Species == "Agnocoris.reclairei", "Agnocoris reclairei"))%>% 
  mutate(Species = replace(Species, Species == "Microphor.crassipes", "Microphor crassipes"))

# plot
ggplot() + 
  geom_polygon(data=hull.data,aes(x=MDS1,y=MDS2, fill= Treatment, group = Treatment), alpha=0.3) + 
  geom_point(data=species.scores2,aes(x=NMDS1,y=NMDS2, color = Group, shape=factor(Group)), size = 3, alpha=0.6) + 
  geom_label_repel(data=rare.species2,aes(x=NMDS1,y=NMDS2, label=Species, fontface="italic"), box.padding = 0.5, size = 2.5)+
  coord_equal() +
  scale_colour_manual(name="Group", labels=c("Arachnida", "Coleoptera", "Diptera", "Hemiptera", "Hymenoptera", "Other"),
                      values = c("darkgoldenrod", "chartreuse3", "deepskyblue2", "firebrick", "goldenrod1", "purple")) +   
  scale_shape_manual(name = "Group", 
                     labels = c("Arachnida", "Coleoptera", "Diptera", "Hemiptera", "Hymenoptera", "Other"),
                     values = c(8,15,19,17,11,4))+
  theme(text = element_text(size = 12))  

###----------------------###
### alpha diversity ###

# load packages
library(vegan)
library(iNEXT)

# load and prep invertebrate data
abun.terr <- read.csv("Terrestrial invertebrate abundance data.csv")
abun.terr <- select(abun.terr, -Treatment)
abun.terr <- data.frame(abun.terr[,-1], row.names=abun.terr[,1])

abun.terr.trans <- t(abun.terr)
abun.terr.trans <- data.frame(abun.terr.trans)

# calculate sample coverage using iNEXT package
terr.coverage <- DataInfo(abun.terr.trans) # range of coverage values would suggest standardising at 90% rather than 95% 

# also channel 12 has v low coverage value so remove from all analyses (also see above) as sample would appear to be poor representation of community 
abun.terr.trans <- select(abun.terr.trans, -X12)

# calculate Hill numbers at 90% coverage
terr.HillDiv.90 <- estimateD(abun.terr.trans, datatype = "abundance", base = "coverage", level = 0.90, conf = 0.95)

# we are not interested in Hill-richness (q=0) so can ignore the warning about prediction bias

terr.HillDiv.90.q1 <- terr.HillDiv.90 %>% filter(Order.q == 1) # select Hill-Shannon rows

Treatment <- c(rep("CON", 5) , rep("FRAG", 9), rep("DRY", 3)) # update vector of channel treatments

# format table of results
terr.diversity <- cbind(Treatment, terr.HillDiv.90.q1$qD)
colnames(terr.diversity) <- c("Treatment", "Hill-Shannon")
terr.diversity <- data.frame(terr.diversity)
terr.diversity$Hill.Shannon <- as.numeric(terr.diversity$Hill.Shannon) 

### test for significant difference in diversity between treatments
# load package
library(conover.test)

kruskal.test(Hill.Shannon~Treatment, data=terr.diversity) # Kruskal-Wallis test indicates significant difference
conover.test(terr.diversity$Hill.Shannon, terr.diversity$Treatment, method = "bh")  # Conover-Imam test indicates significant differences among treatment pairs

# calculate mean and SE for reporting purposes
se <- function(x) sd(x) / sqrt(length(x)) 
terr.diversity %>% group_by(Treatment) %>% summarise(se_HS = 2*se(Hill.Shannon), mean_HS = mean(Hill.Shannon, na.rm=TRUE))


###----------------------###
### beta diversity ### 
library(betapart)

### 1. pairwise beta diversity between treatment pairs

# 1a. CON vs FRAG

# function to create pools of channel subsets for pairwise comparison between treatments (3 per treatment)
sample.df <- function(df, n) df[sample(nrow(df), n), , drop = FALSE]

sample.terr.sums1 <- function(x){
  scon <- sample.df(subset(x, Treatment=="CON"), 3)
  sfrag <- sample.df(subset(x, Treatment=="FRAG"), 3)
  consum <- colSums(scon[,-1])
  fragsum <- colSums(sfrag[,-1])
  sumcomb <- rbind(consum, fragsum)
  sumcomb.pa <- (sumcomb>0)*1L
}

# calculate pairwise beta diversity with 100 replicates of pooled subsets
beta.pairwise.1=replicate(100, beta.pair(sample.terr.sums1(PA.terr), index.family = "sorensen"))
beta.pairwise.1=t(beta.pairwise.1)
beta.pairwise.1=data.frame(beta.pairwise.1)
beta.pairwise.1 <- mutate_all(beta.pairwise.1, function(x) as.numeric(as.character(x)))

beta.confrag <- beta.pairwise.1 %>% summarise(mean_βsim = mean(beta.sim), SE_βsim = 2*se(beta.sim),
                                              mean_βnes = mean(beta.sne), SE_βnes = 2*se(beta.sne))



# 1b. CON vs DRY

# function to create pools of channel subsets for pairwise comparison between treatments (3 per treatment)
sample.terr.sums2 <- function(x){
  scon <- sample.df(subset(x, Treatment=="CON"), 3)
  sdry <- sample.df(subset(x, Treatment=="DRY"), 3)
  consum <- colSums(scon[,-1])
  drysum <- colSums(sdry[,-1])
  sumcomb <- rbind(consum, drysum)
  sumcomb.pa <- (sumcomb>0)*1L
}

# calculate pairwise beta diversity with 100 replicates of pooled subsets
beta.pairwise.2=replicate(100, beta.pair(sample.terr.sums2(PA.terr), index.family = "sorensen"))
beta.pairwise.2=t(beta.pairwise.2)
beta.pairwise.2=data.frame(beta.pairwise.2)
beta.pairwise.2 <- mutate_all(beta.pairwise.2, function(x) as.numeric(as.character(x)))

beta.condry <- beta.pairwise.2 %>% summarise(mean_βsim = mean(beta.sim), SE_βsim = 2*se(beta.sim),
                                             mean_βnes = mean(beta.sne), SE_βnes = 2*se(beta.sne))


# 1c. FRAG vs DRY

# function to create pools of channel subsets for pairwise comparison between treatments (3 per treatment)
sample.terr.sums3 <- function(x){
  sfrag <- sample.df(subset(x, Treatment=="FRAG"), 3)
  sdry <- sample.df(subset(x, Treatment=="DRY"), 3)
  fragsum <- colSums(sfrag[,-1])
  drysum <- colSums(sdry[,-1])
  sumcomb <- rbind(fragsum, drysum)
  sumcomb.pa <- (sumcomb>0)*1L
}

# calculate pairwise beta diversity with 100 replicates of pooled subsets
beta.pairwise.3=replicate(100, beta.pair(sample.terr.sums3(PA.terr), index.family = "sorensen"))
beta.pairwise.3=t(beta.pairwise.3)
beta.pairwise.3=data.frame(beta.pairwise.3)
beta.pairwise.3 <- mutate_all(beta.pairwise.3, function(x) as.numeric(as.character(x)))

beta.fragdry <- beta.pairwise.3 %>% summarise(mean_βsim = mean(beta.sim), SE_βsim = 2*se(beta.sim),
                                              mean_βnes = mean(beta.sne), SE_βnes = 2*se(beta.sne))


### combine the three sets of pairwise results
library(reshape2)

terr.pairwise.table <- rbind(beta.confrag, beta.condry, beta.fragdry)
terr.pairwise.table <- data.frame(terr.pairwise.table)
rownames(terr.pairwise.table) <- c("CON-FRAG", "CON-DRY", "FRAG-DRY")

terr.pairwise.table <- melt(as.matrix(terr.pairwise.table))
terr.pairwise.table <- separate(terr.pairwise.table, col=Var2, into=c("metric", "beta"), sep="_")
terr.pairwise.table <- pivot_wider(terr.pairwise.table, names_from = metric, values_from = value)

terr.pairwise.table$beta <- ordered(terr.pairwise.table$beta, levels = c("βsim", "βnes"))
terr.pairwise.table <- rename(terr.pairwise.table, Pairing = 1)

### plot
pairwise.plot <- ggplot(terr.pairwise.table, aes(fill=Pairing, x=beta, y=mean))+
  geom_bar(stat="identity", position = "dodge", alpha=0.6)+
  geom_errorbar(aes(ymin=mean-SE, ymax=mean+SE), width=0.4, colour="black", position = position_dodge(.9))+
  scale_fill_manual(values=c("#440154FF", "#21908CFF", "#FDE725FF"))+
  labs(x="", y="Pairwise dissimilarity")+
  theme(text = element_text(size = 20),
        legend.position = c(.92,.78))     


###---------------------------------------------------###
### 2. treatment-specific multiple site beta diversity

# 2a. CON

# function to create subsets of 3 channels
sample.terr.con <- function(x){
  scon <- sample.df(subset(x, Treatment=="CON"), 3)
  scon <- scon[,-1]
}

# calculate multiple site beta diversity with 100 replicates of channel combinations
beta.multi.1=replicate(100, beta.multi(sample.terr.con(PA.terr), index.family = "sorensen"))
beta.multi.1=t(beta.multi.1)
beta.multi.1=data.frame(beta.multi.1)
beta.multi.1 <- mutate_all(beta.multi.1, function(x) as.numeric(as.character(x)))

beta.con <- beta.multi.1 %>% summarise(mean_βSIM = mean(beta.SIM), SE_βSIM = 2*se(beta.SIM),
                                                mean_βNES = mean(beta.SNE), SE_βNES = 2*se(beta.SNE))


# 2b. FRAG

# function to create subsets of 3 channels
sample.terr.frag <- function(x){
  sfrag <- sample.df(subset(x, Treatment=="FRAG"), 3)
  snew <- sfrag[,-1]
}

# calculate multiple site beta diversity with 100 replicates of channel combinations
beta.multi.2=replicate(100, beta.multi(sample.terr.frag(PA.terr), index.family = "sorensen"))
beta.multi.2=t(beta.multi.2)
beta.multi.2=data.frame(beta.multi.2)
beta.multi.2 <- mutate_all(beta.multi.2, function(x) as.numeric(as.character(x)))

beta.frag <- beta.multi.2 %>% summarise(mean_βSIM = mean(beta.SIM), SE_βSIM = 2*se(beta.SIM),
                                   mean_βNES = mean(beta.SNE), SE_βNES = 2*se(beta.SNE))

# 2c. DRY

# only 3 channels in this treatment so no need to subset
PA.terr.dry <- PA.terr %>% filter(Treatment == "DRY") %>% select(-Treatment)

# calculate multiple site beta diversity
beta.multi.3=beta.multi(PA.terr.dry, index.family = "sorensen")

beta.dry <- c(beta.multi.3$beta.SIM, 0, beta.multi.3$beta.SNE, 0) # SE=0


### combine the three sets of multiple site results

terr.multi.table <- rbind(beta.con, beta.frag, beta.dry)
rownames(terr.multi.table) <- c("CON", "FRAG", "DRY")

terr.multi.table <- melt(as.matrix(terr.multi.table))
terr.multi.table <- separate(terr.multi.table, col=Var2, into=c("metric", "beta"), sep="_")
terr.multi.table <- pivot_wider(terr.multi.table, names_from = metric, values_from = value)

terr.multi.table$beta <- ordered(terr.multi.table$beta, levels = c("βSIM", "βNES"))
terr.multi.table <- rename(terr.multi.table, Treatment = 1)

### plot
multisite.plot <- ggplot(terr.multi.table, aes(fill=Treatment, x=beta, y=mean))+
  geom_bar(stat="identity", position = "dodge", alpha=0.6)+
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=0.4, colour="black", position = position_dodge(.9))+
  scale_fill_manual(values=c("#440154FF", "#21908CFF", "#FDE725FF"))+
  labs(x="", y="Multisite dissimilarity")+
  theme(text = element_text(size = 20),
        legend.position = c(.92,.78))     
  
# both boxplots together 
grid.arrange(pairwise.plot, multisite.plot)

